cost <- read_csv("../data/MacroTrends_Data_Download.csv")
## Rows: 911 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): date
## dbl (2): cost, nominal
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# head(cost)
cost_ts <- cost %>% select(cost = cost) %>% ts(, start = c(1946, 1), frequency = 12)
ts_plot(cost_ts,
title = "World Yearly Oil Cost ",
Ytitle = "Cost in EJ",
Xtitle = "Years",
slider = TRUE)
dygraph(cost_ts,
main = "World Yearly Oil Cost ",
ylab = "Cost in EJ",
xlab = "Years") %>%
dyRangeSelector()
ts_decompose(cost_ts)
ts_cor(cost_ts)
ts_seasonal(cost_ts, type = "normal")
dy <- diff(diff(cost_ts))
autoplot(dy)

# autoplot(diff(cost_ts))
ggseasonplot(dy)

ggsubseriesplot(dy)

# Forecasting applications
# Setting training and testing partitions
cost_ts1 <- ts_split(ts.obj = cost_ts, sample.out = 12)
train <- cost_ts1$train
test <- cost_ts1$test
# Forecasting with auto.arima
library(forecast)
md <- auto.arima(train, d=1 )
summary(md)
## Series: train
## ARIMA(1,1,0)(0,0,2)[12]
##
## Coefficients:
## ar1 sma1 sma2
## 0.2000 -0.0715 -0.0605
## s.e. 0.0328 0.0341 0.0335
##
## sigma^2 estimated as 21.85: log likelihood=-2657.62
## AIC=5323.24 AICc=5323.29 BIC=5342.44
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03426171 4.664044 2.533212 -0.1618761 4.375093 0.2565892
## ACF1
## Training set 0.0004869705
fc <- forecast(md, h = 12)
# time(fc)
# cycle(fc)
# class(fc)
# Plotting actual vs. fitted and forecasted
test_forecast(actual = cost_ts, forecast.obj = fc, test = test)
plot_forecast(fc)
# Run horse race between multiple models
methods <- list(ets1 = list(method = "ets",
method_arg = list(opt.crit = "lik"),
notes = "ETS model with opt.crit = lik"),
ets2 = list(method = "ets",
method_arg = list(opt.crit = "amse"),
notes = "ETS model with opt.crit = amse"),
arima1 = list(method = "arima",
method_arg = list(order = c(2,1,0)),
notes = "ARIMA(2,1,0)"),
arima2 = list(method = "arima",
method_arg = list(order = c(2,1,2),
seasonal = list(order = c(1,1,1))),
notes = "SARIMA(2,1,2)(1,1,1)"),
hw = list(method = "HoltWinters",
method_arg = NULL,
notes = "HoltWinters Model"),
tslm = list(method = "tslm",
method_arg = list(formula = input ~ trend + season),
notes = "tslm model with trend and seasonal components"))
# Training the models with backtesting
md <- train_model(input = cost_ts,
methods = methods,
train_method = list(partitions = 6,
sample.out = 12,
space = 3),
horizon = 12,
error = "MAPE")
## Warning in (function (x, alpha = NULL, beta = NULL, gamma = NULL, seasonal =
## c("additive", : optimization difficulties: ERROR: ABNORMAL_TERMINATION_IN_LNSRCH
## Warning in (function (x, alpha = NULL, beta = NULL, gamma = NULL, seasonal =
## c("additive", : optimization difficulties: ERROR: ABNORMAL_TERMINATION_IN_LNSRCH
## # A tibble: 6 x 7
## model_id model notes avg_mape avg_rmse `avg_coverage_8~ `avg_coverage_9~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 arima1 arima ARIM~ 0.334 17.3 0.75 0.917
## 2 arima2 arima SARI~ 0.344 17.2 0.75 0.917
## 3 ets2 ets ETS ~ 0.359 18.4 0.5 0.75
## 4 ets1 ets ETS ~ 0.365 18.1 0.528 0.806
## 5 hw HoltWinters Holt~ 0.370 18.7 0.542 0.889
## 6 tslm tslm tslm~ 0.784 32.7 0.514 0.917
plot_model(md)
Cost_df <- data.frame(year = floor(time(cost_ts)), month = cycle(cost_ts),
cost = as.numeric(cost_ts))
plot_forecast(fc)
# fc$model$model$# %>% str()
forcast <- fortify(fc)
cost_df <- data.frame(date = forcast$Index, cost = c(forcast [ 1:899, 3], forcast [ 900:nrow(forcast), 4]))
cost_df %>% ggplot(aes(x = date, y = cost)) +
geom_line()
